/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.matchhighlight;

import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Stream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.intervals.IntervalQuery;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.CharsRef;
import org.hamcrest.MatcherAssert;
import org.hamcrest.Matchers;
import org.junit.Before;
import org.junit.Test;

public class TestMatchHighlighter extends LuceneTestCase {
  private static final String FLD_ID = "id";
  private static final String FLD_TEXT1 = "text1";
  private static final String FLD_TEXT2 = "text2";

  private FieldType TYPE_TEXT_POSITIONS_OFFSETS;
  private FieldType TYPE_TEXT_POSITIONS;

  private PerFieldAnalyzerWrapper analyzer;

  @Before
  public void setup() throws IOException {
    TYPE_TEXT_POSITIONS = TextField.TYPE_STORED;

    TYPE_TEXT_POSITIONS_OFFSETS = new FieldType(TextField.TYPE_STORED);
    TYPE_TEXT_POSITIONS_OFFSETS.setIndexOptions(
        IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    TYPE_TEXT_POSITIONS_OFFSETS.freeze();

    Map<String, Analyzer> fieldAnalyzers = new HashMap<>();

    // Create an analyzer with some synonyms, just to showcase them.
    SynonymMap synonymMap =
        buildSynonymMap(
            new String[][] {
              {"moon\u0000shine", "firewater"},
              {"firewater", "moon\u0000shine"},
            });

    // Make a non-empty offset gap so that break iterator doesn't go haywire on multivalues
    // glued together.
    final int offsetGap = RandomizedTest.randomIntBetween(1, 2);
    final int positionGap = RandomizedTest.randomFrom(new int[] {0, 1, 100});
    Analyzer synonymsAnalyzer =
        new AnalyzerWithGaps(
            offsetGap,
            positionGap,
            new Analyzer() {
              @Override
              protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new WhitespaceTokenizer();
                TokenStream tokenStream = new SynonymGraphFilter(tokenizer, synonymMap, true);
                return new TokenStreamComponents(tokenizer, tokenStream);
              }
            });

    fieldAnalyzers.put(FLD_TEXT1, synonymsAnalyzer);
    fieldAnalyzers.put(FLD_TEXT2, synonymsAnalyzer);

    analyzer = new PerFieldAnalyzerWrapper(new MissingAnalyzer(), fieldAnalyzers);
  }

  static SynonymMap buildSynonymMap(String[][] synonyms) throws IOException {
    SynonymMap.Builder builder = new SynonymMap.Builder();
    for (String[] pair : synonyms) {
      MatcherAssert.assertThat(pair.length, Matchers.equalTo(2));
      builder.add(new CharsRef(pair[0]), new CharsRef(pair[1]), true);
    }
    return builder.build();
  }

  @Test
  public void testBasicUsage() throws Exception {
    new IndexBuilder(this::toField)
        .doc(FLD_TEXT1, "foo bar baz")
        .doc(FLD_TEXT1, "bar foo baz")
        .doc(
            fields -> {
              fields.add(FLD_TEXT1, "Very long content but not matching anything.");
              fields.add(FLD_TEXT2, "no foo but bar");
            })
        .build(
            analyzer,
            reader -> {
              Query query =
                  new BooleanQuery.Builder()
                      .add(new TermQuery(new Term(FLD_TEXT1, "foo")), BooleanClause.Occur.SHOULD)
                      .add(new TermQuery(new Term(FLD_TEXT2, "bar")), BooleanClause.Occur.SHOULD)
                      .build();

              // In the most basic scenario, we run a search against a query, retrieve
              // top docs...
              IndexSearcher searcher = new IndexSearcher(reader);
              Sort sortOrder = Sort.INDEXORDER; // So that results are consistently ordered.
              TopDocs topDocs = searcher.search(query, 10, sortOrder);

              // ...and would want a fixed set of fields from those documents, some of them
              // possibly highlighted if they matched the query.
              //
              // This configures the highlighter so that the FLD_ID field is always returned
              // verbatim,
              // and FLD_TEXT1 is returned *only if it contained a query match*.
              MatchHighlighter highlighter =
                  new MatchHighlighter(searcher, analyzer)
                      .appendFieldHighlighter(FieldValueHighlighters.verbatimValue(FLD_ID))
                      .appendFieldHighlighter(
                          FieldValueHighlighters.highlighted(
                              80 * 3, 1, new PassageFormatter("...", ">", "<"), FLD_TEXT1::equals))
                      .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

              // Note document field highlights are a stream over documents in topDocs. In the
              // remaining code we will just
              // collect them on the fly into a preformatted string.
              Stream<MatchHighlighter.DocHighlights> highlights =
                  highlighter.highlight(topDocs, query);
              assertHighlights(
                  toDocList(highlights),
                  " 0. id: 0",
                  "    text1: >foo< bar baz",
                  " 1. id: 1",
                  "    text1: bar >foo< baz",
                  " 2. id: 2");

              // In a more realistic use case, you'd want to show the value of a given field
              // *regardless* of whether it
              // contained a highlight or not -- it is odd that document "id: 2" above doesn't have
              // the 'text1' field
              // shown because that field wasn't part of the query match.
              //
              // Let's say the field is also potentially long; if it contains a match,
              // we would want to display the contextual snippet surrounding that match. If it does
              // not contain any
              // matches, we would want to display its content up to a given number of characters
              // (lead lines).
              //
              // Let's do this by adding an appropriate field highlighter on FLD_TEXT1.
              highlighter =
                  new MatchHighlighter(searcher, analyzer)
                      .appendFieldHighlighter(FieldValueHighlighters.verbatimValue(FLD_ID))
                      .appendFieldHighlighter(
                          FieldValueHighlighters.highlighted(
                              80 * 3, 1, new PassageFormatter("...", ">", "<"), FLD_TEXT1::equals))
                      .appendFieldHighlighter(
                          FieldValueHighlighters.maxLeadingCharacters(10, "...", Set.of(FLD_TEXT1)))
                      .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

              assertHighlights(
                  toDocList(highlighter.highlight(topDocs, query)),
                  " 0. id: 0",
                  "    text1: >foo< bar baz",
                  " 1. id: 1",
                  "    text1: bar >foo< baz",
                  " 2. id: 2",
                  "    text1: Very long...");

              // Field highlighters can apply to multiple fields and be chained for convenience.
              // For example, this defines a combined highlighter over both FLD_TEXT1 and FLD_TEXT2.
              Set<String> fields = Set.of(FLD_TEXT1, FLD_TEXT2);
              MatchHighlighter.FieldValueHighlighter highlightedOrAbbreviated =
                  FieldValueHighlighters.highlighted(
                          80 * 3, 1, new PassageFormatter("...", ">", "<"), fields::contains)
                      .or(FieldValueHighlighters.maxLeadingCharacters(10, "...", fields));

              highlighter =
                  new MatchHighlighter(searcher, analyzer)
                      .appendFieldHighlighter(FieldValueHighlighters.verbatimValue(FLD_ID))
                      .appendFieldHighlighter(highlightedOrAbbreviated)
                      .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

              assertHighlights(
                  toDocList(highlighter.highlight(topDocs, query)),
                  " 0. id: 0",
                  "    text1: >foo< bar baz",
                  " 1. id: 1",
                  "    text1: bar >foo< baz",
                  " 2. id: 2",
                  "    text1: Very long...",
                  "    text2: no foo but >bar<");
            });
  }

  @Test
  public void testSynonymHighlight() throws Exception {
    // There is nothing special needed to highlight or process complex queries, synonyms, etc.
    // Synonyms defined in the constructor of this class.
    new IndexBuilder(this::toField)
        .doc(FLD_TEXT1, "Where the moon shine falls, firewater flows.")
        .build(
            analyzer,
            reader -> {
              IndexSearcher searcher = new IndexSearcher(reader);
              Sort sortOrder = Sort.INDEXORDER; // So that results are consistently ordered.

              MatchHighlighter highlighter =
                  new MatchHighlighter(searcher, analyzer)
                      .appendFieldHighlighter(
                          FieldValueHighlighters.highlighted(
                              80 * 3, 1, new PassageFormatter("...", ">", "<"), FLD_TEXT1::equals))
                      .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

              Query query = new TermQuery(new Term(FLD_TEXT1, "firewater"));
              assertHighlights(
                  toDocList(highlighter.highlight(searcher.search(query, 10, sortOrder), query)),
                  "0. text1: Where the >moon shine< falls, >firewater< flows.");

              query = new PhraseQuery(FLD_TEXT1, "moon", "shine");
              assertHighlights(
                  toDocList(highlighter.highlight(searcher.search(query, 10, sortOrder), query)),
                  "0. text1: Where the >moon shine< falls, >firewater< flows.");
            });
  }

  @Test
  public void testAnalyzedTextIntervals() throws Exception {
    SynonymMap synonymMap =
        buildSynonymMap(
            new String[][] {
              {"moon\u0000shine", "firewater"},
              {"firewater", "moon\u0000shine"},
            });

    Analyzer analyzer =
        new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new StandardTokenizer();
            TokenStream ts = tokenizer;
            ts = new LowerCaseFilter(ts);
            ts = new SynonymGraphFilter(ts, synonymMap, true);
            ts = new PorterStemFilter(ts);
            return new TokenStreamComponents(tokenizer, ts);
          }
        };

    new IndexBuilder(this::toField)
        .doc(FLD_TEXT1, "Where the moon shine falls, firewater flows.")
        .build(
            analyzer,
            reader -> {
              IndexSearcher searcher = new IndexSearcher(reader);
              Sort sortOrder = Sort.INDEXORDER; // So that results are consistently ordered.

              MatchHighlighter highlighter =
                  new MatchHighlighter(searcher, analyzer)
                      .appendFieldHighlighter(
                          FieldValueHighlighters.highlighted(
                              80 * 3, 1, new PassageFormatter("...", ">", "<"), FLD_TEXT1::equals))
                      .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

              {
                // [moon shine, firewater] are synonyms, tokens are lowercased. Porter stemming on.
                Query query =
                    new IntervalQuery(
                        FLD_TEXT1,
                        Intervals.analyzedText("Firewater Fall", analyzer, FLD_TEXT1, 0, true));

                assertHighlights(
                    toDocList(highlighter.highlight(searcher.search(query, 10, sortOrder), query)),
                    "0. text1: Where the >moon shine falls<, firewater flows.");
              }
            });
  }

  @Test
  public void testStandardQueryParserIntervalFunctions() throws Exception {
    Analyzer analyzer =
        new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new StandardTokenizer();
            TokenStream ts = tokenizer;
            ts = new LowerCaseFilter(ts);
            return new TokenStreamComponents(tokenizer, ts);
          }
        };

    // Rerun the same test on fields with offsets and without offsets.
    for (String field : List.of(FLD_TEXT1, FLD_TEXT2)) {
      String inputDocument = "The quick brown fox jumps over the lazy dog";

      String[][] queryResultPairs =
          new String[][] {
            {"fn:ordered(brown dog)", "0. %s: The quick >brown fox jumps over the lazy dog<"},
            {
              "fn:within(fn:or(lazy quick) 1 fn:or(dog fox))",
              "0. %s: The quick brown fox jumps over the >lazy< dog"
            },
            {
              "fn:containedBy(fox fn:ordered(brown fox dog))",
              "0. %s: The quick brown >fox< jumps over the lazy dog"
            },
            {
              "fn:atLeast(2 quick fox \"furry dog\")",
              "0. %s: The >quick brown fox< jumps over the lazy dog"
            },
            {
              "fn:maxgaps(0 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
              "0. %s: The quick brown fox jumps over the >lazy dog<"
            },
            {
              "fn:maxgaps(1 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
              "0. %s: The >quick brown fox< jumps over the >lazy dog<"
            },
            {
              "fn:maxwidth(2 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
              "0. %s: The quick brown fox jumps over the >lazy dog<"
            },
            {
              "fn:maxwidth(3 fn:ordered(fn:or(quick lazy) fn:or(fox dog)))",
              "0. %s: The >quick brown fox< jumps over the >lazy dog<"
            },
            {"fn:or(quick \"fox\")", "0. %s: The >quick< brown >fox< jumps over the lazy dog"},
            {"fn:or(\"quick fox\")"},
            {"fn:phrase(quick brown fox)", "0. %s: The >quick brown fox< jumps over the lazy dog"},
            {"fn:wildcard(jump*)", "0. %s: The quick brown fox >jumps< over the lazy dog"},
            {"fn:wildcard(br*n)", "0. %s: The quick >brown< fox jumps over the lazy dog"},
            {"fn:fuzzyTerm(fxo)", "0. %s: The quick brown >fox< jumps over the lazy dog"},
            {"fn:or(dog fox)", "0. %s: The quick brown >fox< jumps over the lazy >dog<"},
            {
              "fn:phrase(fn:ordered(quick fox) jumps)",
              "0. %s: The >quick brown fox jumps< over the lazy dog"
            },
            {"fn:ordered(quick jumps dog)", "0. %s: The >quick brown fox jumps over the lazy dog<"},
            {
              "fn:ordered(quick fn:or(fox dog))",
              "0. %s: The >quick brown fox< jumps over the lazy dog"
            },
            {
              "fn:ordered(quick jumps fn:or(fox dog))",
              "0. %s: The >quick brown fox jumps over the lazy dog<"
            },
            {
              "fn:unordered(dog jumps quick)",
              "0. %s: The >quick brown fox jumps over the lazy dog<"
            },
            {
              "fn:unordered(fn:or(fox dog) quick)",
              "0. %s: The >quick brown fox< jumps over the lazy dog"
            },
            {
              "fn:unordered(fn:phrase(brown fox) fn:phrase(fox jumps))",
              "0. %s: The quick >brown fox jumps< over the lazy dog"
            },
            {"fn:ordered(fn:phrase(brown fox) fn:phrase(fox jumps))"},
            {"fn:unorderedNoOverlaps(fn:phrase(brown fox) fn:phrase(fox jumps))"},
            {
              "fn:before(fn:or(brown lazy) fox)",
              "0. %s: The quick >brown< fox jumps over the lazy dog"
            },
            {
              "fn:before(fn:or(brown lazy) fn:or(dog fox))",
              "0. %s: The quick >brown< fox jumps over the >lazy< dog"
            },
            {
              "fn:after(fn:or(brown lazy) fox)",
              "0. %s: The quick brown fox jumps over the >lazy< dog"
            },
            {
              "fn:after(fn:or(brown lazy) fn:or(dog fox))",
              "0. %s: The quick brown fox jumps over the >lazy< dog"
            },
            {
              "fn:within(fn:or(fox dog) 1 fn:or(quick lazy))",
              "0. %s: The quick brown fox jumps over the lazy >dog<"
            },
            {
              "fn:within(fn:or(fox dog) 2 fn:or(quick lazy))",
              "0. %s: The quick brown >fox< jumps over the lazy >dog<"
            },
            {
              "fn:notWithin(fn:or(fox dog) 1 fn:or(quick lazy))",
              "0. %s: The quick brown >fox< jumps over the lazy dog"
            },
            {
              "fn:containedBy(fn:or(fox dog) fn:ordered(quick lazy))",
              "0. %s: The quick brown >fox< jumps over the lazy dog"
            },
            {
              "fn:notContainedBy(fn:or(fox dog) fn:ordered(quick lazy))",
              "0. %s: The quick brown fox jumps over the lazy >dog<"
            },
            {
              "fn:containing(fn:atLeast(2 quick fox dog) jumps)",
              "0. %s: The quick brown >fox jumps over the lazy dog<"
            },
            {
              "fn:notContaining(fn:ordered(fn:or(the The) fn:or(fox dog)) brown)",
              "0. %s: The quick brown fox jumps over >the lazy dog<"
            },
            {
              "fn:overlapping(fn:phrase(brown fox) fn:phrase(fox jumps))",
              "0. %s: The quick >brown fox< jumps over the lazy dog"
            },
            {
              "fn:overlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
              "0. %s: The quick brown fox jumps over the lazy >dog<"
            },
            {
              "fn:nonOverlapping(fn:phrase(brown fox) fn:phrase(lazy dog))",
              "0. %s: The quick >brown fox< jumps over the lazy dog"
            },
            {
              "fn:nonOverlapping(fn:or(fox dog) fn:extend(lazy 2 2))",
              "0. %s: The quick brown >fox< jumps over the lazy dog"
            },
            {
              "fn:atLeast(2 fn:unordered(furry dog) fn:unordered(brown dog) lazy quick)",
              "0. %s: The >quick >brown fox jumps over the lazy<<> dog<"
            },
            {"fn:extend(fox 1 2)", "0. %s: The quick >brown fox jumps over< the lazy dog"},
            {
              "fn:extend(fn:or(dog fox) 2 0)",
              "0. %s: The >quick brown fox< jumps over >the lazy dog<"
            },
            {
              "fn:containedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
              "0. %s: The quick brown fox jumps over the lazy >dog<"
            },
            {
              "fn:notContainedBy(fn:or(fox dog) fn:extend(lazy 3 3))",
              "0. %s: The quick brown >fox< jumps over the lazy dog"
            },
            {
              "fn:containing(fn:extend(fn:or(lazy brown) 1 1) fn:or(fox dog))",
              "0. %s: The >quick brown fox< jumps over >the lazy dog<"
            },
            {
              "fn:notContaining(fn:extend(fn:or(fox dog) 1 0) fn:or(brown yellow))",
              "0. %s: The quick brown fox jumps over the >lazy dog<"
            }
          };

      // Verify assertions.
      new IndexBuilder(this::toField)
          // Just one document and multiple interval queries to check.
          .doc(field, inputDocument)
          .build(
              analyzer,
              reader -> {
                IndexSearcher searcher = new IndexSearcher(reader);
                Sort sortOrder = Sort.INDEXORDER; // So that results are consistently ordered.

                MatchHighlighter highlighter =
                    new MatchHighlighter(searcher, analyzer)
                        .appendFieldHighlighter(
                            FieldValueHighlighters.highlighted(
                                80 * 3, 1, new PassageFormatter("...", ">", "<"), _ -> true))
                        .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

                StandardQueryParser qp = new StandardQueryParser(analyzer);

                // Run all pairs of query-expected highlight.
                List<String> errors = new ArrayList<>();
                for (var queryHighlightPair : queryResultPairs) {
                  assert queryHighlightPair.length >= 1;
                  String queryString = queryHighlightPair[0];
                  var query = qp.parse(queryString, field);
                  var expected =
                      Arrays.stream(queryHighlightPair)
                          .skip(1)
                          .map(v -> String.format(Locale.ROOT, v, field))
                          .toArray(String[]::new);

                  try {
                    assertHighlights(
                        toDocList(
                            highlighter.highlight(searcher.search(query, 10, sortOrder), query)),
                        expected);
                  } catch (AssertionError e) {
                    errors.add("MISMATCH: query: " + queryString + "\n" + e.getMessage());
                  }
                }
                if (errors.size() > 0) {
                  throw new AssertionError(String.join("\n\n", errors));
                }
              });
    }
  }

  @Test
  public void testCustomFieldHighlightHandling() throws Exception {
    // Match highlighter is a showcase of individual components in this package, suitable
    // to create any kind of field-display designs.
    //
    // In this example we will build a custom field highlighting handler that
    // highlights matches over a multivalued field, shows that field's values if it received
    // no matches and limits the number of values displayed to at most 2 (with an appropriate
    // message).
    new IndexBuilder(this::toField)
        // Just one document, one field, four values.
        .doc(FLD_TEXT1, "foo bar", "bar foo baz", "bar baz foo", "baz baz baz")
        .build(
            analyzer,
            reader -> {
              IndexSearcher searcher = new IndexSearcher(reader);
              Sort sortOrder = Sort.INDEXORDER;

              // Let's start with the simple predefined highlighter so that the field's value shows
              // and is highlighted when it was part of the hit.
              MatchHighlighter.FieldValueHighlighter highlighted =
                  FieldValueHighlighters.highlighted(
                      80 * 3, 2, new PassageFormatter("...", ">", "<"), FLD_TEXT1::equals);
              MatchHighlighter highlighter =
                  new MatchHighlighter(searcher, analyzer)
                      .appendFieldHighlighter(highlighted)
                      .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

              Query query = new TermQuery(new Term(FLD_TEXT1, "foo"));
              TopDocs topDocs = searcher.search(query, 10, sortOrder);

              // Note the highlighter is configured with at most 2 snippets so the match on the
              // third value ("bar baz foo") is omitted. Ellipsis isn't inserted too because
              // values are displayed in full.
              assertHighlights(
                  toDocList(highlighter.highlight(topDocs, query)),
                  "0. text1: >foo< bar, bar >foo< baz");

              // So the above works fine if the field received a match but omits it otherwise. We
              // can
              // force the display of this field by chaining with verbatim value highlighter:
              highlighter =
                  new MatchHighlighter(searcher, analyzer)
                      .appendFieldHighlighter(
                          highlighted.or(FieldValueHighlighters.verbatimValue(FLD_TEXT1)))
                      .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

              assertHighlights(
                  toDocList(highlighter.highlight(topDocs, new MatchAllDocsQuery())),
                  "0. text1: foo bar, bar foo baz, bar baz foo, baz baz baz");

              // But this is not exactly what we'd like because we want to limit the display of
              // values to the first two.
              // Let's just write a custom field highlighter handler that does it.
              class AtMostNValuesHighlighter implements MatchHighlighter.FieldValueHighlighter {
                private final String field;
                private final int limit;

                AtMostNValuesHighlighter(String field, int limit) {
                  this.field = field;
                  this.limit = limit;
                }

                @Override
                public boolean isApplicable(String field, boolean hasMatches) {
                  return Objects.equals(field, this.field);
                }

                @Override
                public List<String> format(
                    String field,
                    List<String> values,
                    String contiguousValue,
                    List<OffsetRange> valueRanges,
                    List<MatchHighlighter.QueryOffsetRange> matchOffsets) {
                  if (values.size() <= limit) {
                    return values;
                  } else {
                    List<String> collected = values.subList(0, limit);
                    int remaining = values.size() - collected.size();
                    collected.add(String.format(Locale.ROOT, "[%d omitted]", remaining));
                    return collected;
                  }
                }

                @Override
                public Collection<String> alwaysFetchedFields() {
                  return Collections.singleton(field);
                }
              }

              // We can now chain it as usual and contemplate the result.
              highlighter =
                  new MatchHighlighter(searcher, analyzer)
                      .appendFieldHighlighter(
                          highlighted.or(new AtMostNValuesHighlighter(FLD_TEXT1, 2)))
                      .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

              assertHighlights(
                  toDocList(highlighter.highlight(topDocs, query)),
                  "0. text1: >foo< bar, bar >foo< baz");
              assertHighlights(
                  toDocList(highlighter.highlight(topDocs, new MatchAllDocsQuery())),
                  "0. text1: foo bar, bar foo baz, [2 omitted]");
            });
  }

  @Test
  public void testHighlightMoreQueriesAtOnceShowoff() throws Exception {
    // Match highlighter underlying components are powerful enough to build interesting,
    // if not always super-practical, things. In this case, we would like to highlight
    // a set of matches of *more than one* query over the same set of input documents. This includes
    // highest-scoring passage resolution (from multiple hits) and different highlight markers
    // for each query.
    new IndexBuilder(this::toField)
        .doc(FLD_TEXT1, "foo bar baz")
        .doc(FLD_TEXT1, "foo baz bar")
        .build(
            analyzer,
            reader -> {
              // Let's start with the two queries. The first one will be an unordered
              // query for (foo, baz) with a max gap of 1; let's use intervals for this.
              Query q1 =
                  new IntervalQuery(
                      FLD_TEXT1,
                      Intervals.maxgaps(
                          1, Intervals.unordered(Intervals.term("foo"), Intervals.term("baz"))));

              // The second one will be a simpler term query for "bar".
              Query q2 = new TermQuery(new Term(FLD_TEXT1, "bar"));

              // Let's fetch matching documents by combining the two into a Boolean query.
              Query query =
                  new BooleanQuery.Builder()
                      .add(q1, BooleanClause.Occur.SHOULD)
                      .add(q2, BooleanClause.Occur.SHOULD)
                      .build();

              IndexSearcher searcher = new IndexSearcher(reader);
              Sort sortOrder = Sort.INDEXORDER; // So that results are consistently ordered.
              TopDocs topDocs = searcher.search(query, 10, sortOrder);

              // If we use the "regular" highlighter, the result will be slightly odd: a nested
              // highlight over "bar" within the first match. Also, you can't distinguish which of
              // the sub-queries
              // caused which highlight marker... but if it were HTML then you could give the span
              // some semi-translucent background and layered matches would be visible.
              MatchHighlighter highlighter =
                  new MatchHighlighter(searcher, analyzer)
                      .appendFieldHighlighter(
                          FieldValueHighlighters.highlighted(
                              80 * 3,
                              1,
                              new PassageFormatter("...", "<span>", "</span>"),
                              FLD_TEXT1::equals))
                      .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

              assertHighlights(
                  toDocList(highlighter.highlight(topDocs, query)),
                  "0. text1: <span>foo <span>bar</span> baz</span>",
                  "1. text1: <span>foo baz</span> <span>bar</span>");

              // To separate highlights for multiple queries we'll pass them separately to the
              // highlighter and differentiate highlight markers upon their application. Let's start
              // with the customized
              // field highlighter first. This utilizes the fact that match ranges passed from
              // MatchHighlighter
              // contain a reference to the original query which brought up the match.
              class SeparateMarkerFieldHighlighter
                  implements MatchHighlighter.FieldValueHighlighter {
                private final String field;
                private final Map<Query, String> queryClassMap;

                SeparateMarkerFieldHighlighter(String field, Map<Query, String> queryClassMap) {
                  this.field = field;
                  this.queryClassMap = queryClassMap;
                }

                @Override
                public boolean isApplicable(String field, boolean hasMatches) {
                  return Objects.equals(field, this.field) && hasMatches;
                }

                @Override
                public List<String> format(
                    String field,
                    List<String> values,
                    String contiguousValue,
                    List<OffsetRange> valueRanges,
                    List<MatchHighlighter.QueryOffsetRange> matchOffsets) {
                  PassageSelector passageSelector = new PassageSelector();
                  int maxPassageWindow = 80;
                  int maxPassages = 3;
                  List<Passage> bestPassages =
                      passageSelector.pickBest(
                          contiguousValue,
                          matchOffsets,
                          maxPassageWindow,
                          maxPassages,
                          valueRanges);

                  // We know the offset ranges passed to us by MatchHighlighter are instances of
                  // QueryOffsetRange
                  // so we compute the class based on that.
                  Function<OffsetRange, String> queryToClass =
                      (range) ->
                          queryClassMap.get(((MatchHighlighter.QueryOffsetRange) range).query);

                  PassageFormatter passageFormatter =
                      new PassageFormatter(
                          "...",
                          (range) -> "<span class='" + queryToClass.apply(range) + "'>",
                          (_) -> "</span>");

                  return passageFormatter.format(contiguousValue, bestPassages, valueRanges);
                }
              }

              // And this is pretty much it. We now set up query classes to display, set up the
              // highlighter...
              Map<Query, String> queryClassMap = Map.of(q1, "q1", q2, "q2");
              highlighter =
                  new MatchHighlighter(searcher, analyzer)
                      .appendFieldHighlighter(
                          new SeparateMarkerFieldHighlighter(FLD_TEXT1, queryClassMap))
                      .appendFieldHighlighter(FieldValueHighlighters.skipRemaining());

              // ...and run highlighting. Note the query passed to the highlighter are individual
              // sub-clauses
              // of the Boolean query used to fetch documents.
              assertHighlights(
                  toDocList(highlighter.highlight(topDocs, q1, q2)),
                  "0. text1: <span class='q1'>foo <span class='q2'>bar</span> baz</span>",
                  "1. text1: <span class='q1'>foo baz</span> <span class='q2'>bar</span>");
            });
  }

  private void assertHighlights(List<List<String>> docList, String... expectedFormattedLines) {
    ArrayList<String> actualLines = new ArrayList<>();
    for (int doc = 0; doc < docList.size(); doc++) {
      List<String> fields = docList.get(doc);
      for (int i = 0; i < fields.size(); i++) {
        actualLines.add(
            (i == 0 ? String.format(Locale.ROOT, "%2d. ", doc) : "    ") + fields.get(i));
      }
    }

    var expectedTrimmed = Stream.of(expectedFormattedLines).map(String::trim).toList();
    var actualTrimmed = actualLines.stream().map(String::trim).toList();
    if (!Objects.equals(expectedTrimmed, actualTrimmed)) {
      throw new AssertionError(
          "Actual hits were:\n"
              + String.join("\n", actualTrimmed)
              + "\n\nbut expected them to be:\n"
              + String.join("\n", expectedTrimmed));
    }
  }

  private List<List<String>> toDocList(Stream<MatchHighlighter.DocHighlights> highlights) {
    return highlights
        .map(
            docHighlights ->
                docHighlights.fields.entrySet().stream()
                    .map(e -> e.getKey() + ": " + String.join(", ", e.getValue()))
                    .toList())
        .toList();
  }

  private IndexableField toField(String name, String value) {
    switch (name) {
      case FLD_TEXT1:
        return new Field(name, value, TYPE_TEXT_POSITIONS_OFFSETS);
      case FLD_TEXT2:
        return new Field(name, value, TYPE_TEXT_POSITIONS);
      default:
        throw new AssertionError("Don't know how to handle this field: " + name);
    }
  }
}
